## Loading required package: ggplot2
## Loading required package: grid
## Loading required package: gridExtra
## Loading required package: reshape2
## Loading required package: ROCR
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess
## 
## Loading required package: plyr
## Loading required package: stringr
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
yVars <- c('yCat','yNumeric')
yName <- 'yNumeric'


set.seed(232567)
vplan1 <- list(designVar('x1',10))
vplan2 <- list(vplan1[[1]],
               designNoiseVar('n1',500))

experiments <- list(
  list(vplan=vplan1,fnFitter=lrFitter,
       eName="one variable, linear regression"),
  list(vplan=vplan2,fnFitter=lrFitter,
       eName="one variable plus noise variable, linear regression"),
  list(vplan=vplan1,fnFitter=dFitter,
       eName="one variable, diagonal regression"),
  list(vplan=vplan2,fnFitter=dFitter,
       eName="one variable plus noise variable, diagonal regression")
)


for(expmt in experiments) {
  set.seed(232567)
  vplan <- expmt$vplan
  fnFitter <- expmt$fnFitter
  eName <- expmt$eName
  
  print("*************************************************************")
  print(eName)
  
  dTrain <- generateExample(vplan,2000)  # Training set
  vars <- setdiff(colnames(dTrain),yVars)
  dCal <- generateExample(vplan,10000)   # Used to pick sigma
  dTest <- generateExample(vplan,10000)  # Pure holdout test
  
  
  findSigma <- function(cl,
                        yName,
                        yVars,
                        dTrain,
                        vars,
                        dCal) {
    mkWorker1 <- function(yName,
                          yVars,
                          dTrain,
                          vars,
                          dCal) {
      force(yName)
      force(yVars)
      force(dTrain)
      force(vars)
      force(dCal)
      bindToEnv(objNames=sourcedFns,
                fnFitter)
      function(sigma) {
        scoresB <- numeric(3)
        for(rep in seq_len(length(scoresB))) {
          bCoder <- trainEffectCoderR(dTrain,yName,vars,sigma)
          dTrainB <- bCoder$codeFrameR(dTrain)
          dCalB <- bCoder$codeFrameR(dCal)
          varsB <- setdiff(colnames(dTrainB),yVars)
          preds <- fnFitter(yName,varsB,dTrainB,dCalB) 
          dCalB$pred <- preds$appPred
          scoresB[[rep]] <- rmse(dCalB$pred,dCalB[[yName]])
        }
        list(scoreB=mean(scoresB),sigma=sigma)
      }
    }
    
    sigmaTargets <- (seq_len(41)-1)
    worker <- mkWorker1(yName,
                        yVars,
                        dTrain,
                        vars,
                        dCal)
    if(!is.null(cl)) {
      results <- parallel::parLapplyLB(cl,sigmaTargets,worker)
    } else {
      results <- vector(mode='list',length=length(sigmaTargets))
      for(ii in seq_len(length(sigmaTargets))) {
        results[[ii]] <- worker(sigmaTargets[[ii]])
      }
    }
    
    bSigmaBest = 0
    bestB = Inf
    for(res in results) {
      sigma <- res$sigma
      scoreB <- res$scoreB
      if(scoreB<bestB) {
        bestB <- scoreB
        bSigmaBest <- sigma
      }
    }
    bSigmaBest
  }
  
  bSigmaBest <- findSigma(cl,
                          yName,
                          yVars,
                          dTrain,
                          vars,
                          dCal) 
  
  print(paste('bSigmaBest',bSigmaBest))
  
  
  
  print('naive effects model')
  bCoder <- trainEffectCoderR(dTrain,yName,vars,0)
  dTrainB <- bCoder$codeFrameR(dTrain)
  dTestB <- bCoder$codeFrameR(dTest)
  varsB <- setdiff(colnames(dTrainB),yVars)
  preds <- fnFitter(yName,varsB,dTrainB,dTestB,verbose=TRUE)
  dTrainB$pred <- preds$trainPred
  print(paste('train rmse',rmse(dTrainB$pred,dTrainB[[yName]])))
  print(WVPlots::ScatterHist(dTrainB,'pred',yName,
                             paste(eName,
                                   'naive effects model train',
                                   sep='\n'),
                             smoothmethod='lm',annot_size=2))
  dTestB$pred <- preds$appPred
  print(paste('test rmse',rmse(dTestB$pred,dTestB[[yName]])))
  print(WVPlots::ScatterHist(dTestB,'pred',yName,
                             paste(eName,
                                   'naive effects model test',
                                   sep='\n'),
                             smoothmethod='lm',annot_size=2))
  
  
  print(paste('effects model, sigma=',bSigmaBest))
  bCoder <- trainEffectCoderR(dTrain,yName,vars,bSigmaBest)
  dTrainB <- bCoder$codeFrameR(dTrain)
  dTestB <- bCoder$codeFrameR(dTest)
  varsB <- setdiff(colnames(dTrainB),yVars)
  preds <- fnFitter(yName,varsB,dTrainB,dTestB,verbose=TRUE)
  dTrainB$pred <- preds$trainPred
  print(paste('train rmse',rmse(dTrainB$pred,dTrainB[[yName]])))
  print(WVPlots::ScatterHist(dTrainB,'pred',yName,
                             paste(eName,
                                   '\neffects model train, sigma=',
                                   bSigmaBest),
                             smoothmethod='lm',annot_size=2))
  dTestB$pred <- preds$appPred
  print(paste('test rmse',rmse(dTestB$pred,dTestB[[yName]])))
  print(WVPlots::ScatterHist(dTestB,'pred',yName,
                             paste(eName,
                                   '\neffects model test, sigma=',
                                   bSigmaBest),
                             smoothmethod='lm',annot_size=2))
  
  
  print('effects model, jacknifed')
  bCoder <- trainEffectCoderR(dTrain,yName,vars,0)
  # dTrainB <- bCoder$codeFrame(dTrain)
  # dTrainB <- bCoder$codeFrame(dCal)
  dTrainB <- jackknifeEffectCodeR(dTrain,yName,vars)
  dTestB <- bCoder$codeFrameR(dTest)
  varsB <- setdiff(colnames(dTrainB),yVars)
  preds <- fnFitter(yName,varsB,dTrainB,dTestB,verbose=TRUE)
  dTrainB$pred <- preds$trainPred
  print(paste('train rmse',rmse(dTrainB$pred,dTrainB[[yName]])))
  print(WVPlots::ScatterHist(dTrainB,'pred',yName,
                             paste(eName,
                                   'effects model train, jackknifed',
                                   sep='\n'),
                             smoothmethod='lm',annot_size=2))
  dTestB$pred <- preds$appPred
  print(paste('test rmse',rmse(dTestB$pred,dTestB[[yName]])))
  print(WVPlots::ScatterHist(dTestB,'pred',yName,
                             paste(eName,
                                   'effects model test, jackknifed',
                                   sep='\n'),
                             smoothmethod='lm',annot_size=2))
  
  
  mkExpmtRunner <- function(vplan) {
    force(vplan)
    bindToEnv(objNames=sourcedFns,
              findSigma,
              sourcedFns,
              fnFitter)
    function(repID) {
      # set up experiment
      yVars <- c('yCat','yNumeric')
      yName <- 'yNumeric'
      dTrain <- generateExample(vplan,2000)  # Training set
      vars <- setdiff(colnames(dTrain),yVars)
      dCal <- generateExample(vplan,10000)   # Used to pick sigma
      dTest <- generateExample(vplan,10000)  # Pure holdout test
      
      # run naive mode
      bCoder <- trainEffectCoderR(dTrain,yName,vars,0)
      dTrainB <- bCoder$codeFrameR(dTrain)
      dTestB <- bCoder$codeFrameR(dTest)
      varsB <- setdiff(colnames(dTrainB),yVars)
      preds <- fnFitter(yName,varsB,dTrainB,dTestB) 
      dTrainB$pred <- preds$trainPred
      trainRMSE <- rmse(dTrainB$pred,dTrainB[[yName]])
      dTestB$pred <- preds$appPred
      testRMSE <- rmse(dTestB$pred,dTestB[[yName]])
      f1 <- data.frame(repID=repID,
                       bSigmaBest=NA,
                       what='NaiveModel',
                       trainRMSE=trainRMSE,
                       testRMSE=testRMSE,
                       stringsAsFactors = FALSE)
      
      # sigma model
      bSigmaBest <- findSigma(NULL,
                              yName,
                              yVars,
                              dTrain,
                              vars,
                              dCal) 
      bCoder <- trainEffectCoderR(dTrain,yName,vars,bSigmaBest)
      dTrainB <- bCoder$codeFrameR(dTrain)
      dTestB <- bCoder$codeFrameR(dTest)
      varsB <- setdiff(colnames(dTrainB),yVars)
      preds <- fnFitter(yName,varsB,dTrainB,dTestB) 
      dTrainB$pred <- preds$trainPred
      trainRMSE <- rmse(dTrainB$pred,dTrainB[[yName]])
      dTestB$pred <- preds$appPred
      testRMSE <- rmse(dTestB$pred,dTestB[[yName]])
      f2 <- data.frame(repID=repID,
                       bSigmaBest=bSigmaBest,
                       what='NoisedModel',
                       trainRMSE=trainRMSE,
                       testRMSE=testRMSE,
                       stringsAsFactors = FALSE)
      
      # jackknifed model
      bCoder <- trainEffectCoderR(dTrain,yName,vars,0)
      dTrainB <- jackknifeEffectCodeR(dTrain,yName,vars)
      dTestB <- bCoder$codeFrameR(dTest)
      varsB <- setdiff(colnames(dTrainB),yVars)
      preds <- fnFitter(yName,varsB,dTrainB,dTestB) 
      dTrainB$pred <- preds$trainPred
      trainRMSE <- rmse(dTrainB$pred,dTrainB[[yName]])
      dTestB$pred <- preds$appPred
      testRMSE <- rmse(dTestB$pred,dTestB[[yName]])
      f3 <- data.frame(repID=repID,
                       bSigmaBest=NA,
                       what='JackknifeModel',
                       trainRMSE=trainRMSE,
                       testRMSE=testRMSE,
                       stringsAsFactors = FALSE)
      rbind(f1,f2,f3)
    }
  }
  
  eworker <- mkExpmtRunner(vplan)
  res <- parallel::parLapplyLB(cl,1:200,eworker)
  res <- do.call(rbind,res)
  
  print(ggplot(data=res,aes(x=testRMSE,color=what)) +
    geom_density(adjust=0.5,trim=TRUE) + 
    ggtitle(paste(eName,'test RMSE, noised model',sep='\n')))
  
  for(w in sort(unique(res$what))) {
    print("********")
    print(w)
    ri <- res[res$what==w,]
    print(summary(ri$testRMSE))
    print(sqrt(var(ri$testRMSE)))
    print("********")
  }
  
  rm <- sqldf('
   SELECT
      rJ.repID,
      rN.testRMSE - rJ.testRMSE NrmseMinusJrmse,
      rN.testRMSE nTestRMSE,
      rJ.testRMSE jTestRMSE,
      rN.bSigmaBest
   FROM
      res rJ
   JOIN
      res rN
   ON
      rJ.repID=rN.repID
   WHERE
      rJ.what="JackknifeModel" AND
      rN.what="NoisedModel"
')
  
  print(ggplot(data=rm,aes(x=NrmseMinusJrmse)) +
    geom_density(adjust=0.5,trim=TRUE) + 
    ggtitle(paste(eName,'noise test RMSE minus jackknife test RMSE',
                  sep='\n')))
  
  print(ggplot(data=res[res$what=='NoisedModel',],aes(x=bSigmaBest)) +
    geom_density(adjust=0.5) +
      ggtitle(eName))
  
  print(ggplot(data=rm,aes(x=nTestRMSE,y=jTestRMSE)) +
    geom_point() +
    geom_abline(slope=1,intercept=0) +
    coord_fixed() +
    ggtitle(paste(eName,
                  'noised model performance versus jackknifed model performance',
                  sep='\n')))
  
  print(WVPlots::ScatterHist(rm,'bSigmaBest','NrmseMinusJrmse',
                             paste(eName,
                                   'sigma selected versus delta performance',
                                   sep='\n'),
                             smoothmethod='lm',annot_size=2))
  
  print(WVPlots::ScatterHist(rm,'bSigmaBest','nTestRMSE',
                             paste(eName,
                                   'sigma selected versus performance',
                                   sep='\n'),
                             smoothmethod='lm',annot_size=2))
  print("*************************************************************")
}
## [1] "*************************************************************"
## [1] "one variable, linear regression"
## [1] "bSigmaBest 1"
## [1] "naive effects model"
## 
## Call:
## lm(formula = formulaL, data = trainData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3721 -0.6891 -0.0037  0.6848  3.7826 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.20623    0.02260   9.125   <2e-16 ***
## x1           1.00000    0.03685  27.137   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.011 on 1998 degrees of freedom
## Multiple R-squared:  0.2693, Adjusted R-squared:  0.269 
## F-statistic: 736.4 on 1 and 1998 DF,  p-value: < 2.2e-16
## 
## [1] "train rmse 1.01025938596012"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                grob
## 1 1 (2-2,1-1) arrange      gtable[layout]
## 2 2 (2-2,2-2) arrange      gtable[layout]
## 3 3 (3-3,1-1) arrange      gtable[layout]
## 4 4 (3-3,2-2) arrange      gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.140]
## [1] "test rmse 0.999915402747535"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                grob
## 1 1 (2-2,1-1) arrange      gtable[layout]
## 2 2 (2-2,2-2) arrange      gtable[layout]
## 3 3 (3-3,1-1) arrange      gtable[layout]
## 4 4 (3-3,2-2) arrange      gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.293]
## [1] "effects model, sigma= 1"
## 
## Call:
## lm(formula = formulaL, data = trainData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3750 -0.6883 -0.0014  0.6870  3.7847 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.20624    0.02260   9.125   <2e-16 ***
## x1           1.00213    0.03693  27.135   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.011 on 1998 degrees of freedom
## Multiple R-squared:  0.2693, Adjusted R-squared:  0.2689 
## F-statistic: 736.3 on 1 and 1998 DF,  p-value: < 2.2e-16
## 
## [1] "train rmse 1.01028030952866"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                grob
## 1 1 (2-2,1-1) arrange      gtable[layout]
## 2 2 (2-2,2-2) arrange      gtable[layout]
## 3 3 (3-3,1-1) arrange      gtable[layout]
## 4 4 (3-3,2-2) arrange      gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.446]
## [1] "test rmse 1.00016906466239"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                grob
## 1 1 (2-2,1-1) arrange      gtable[layout]
## 2 2 (2-2,2-2) arrange      gtable[layout]
## 3 3 (3-3,1-1) arrange      gtable[layout]
## 4 4 (3-3,2-2) arrange      gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.599]
## [1] "effects model, jacknifed"
## 
## Call:
## lm(formula = formulaL, data = trainData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3933 -0.6946 -0.0039  0.6875  3.7985 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.2062     0.0227   9.084   <2e-16 ***
## x1            0.9871     0.0370  26.682   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.015 on 1998 degrees of freedom
## Multiple R-squared:  0.2627, Adjusted R-squared:  0.2623 
## F-statistic:   712 on 1 and 1998 DF,  p-value: < 2.2e-16
## 
## [1] "train rmse 1.01481235978284"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                grob
## 1 1 (2-2,1-1) arrange      gtable[layout]
## 2 2 (2-2,2-2) arrange      gtable[layout]
## 3 3 (3-3,1-1) arrange      gtable[layout]
## 4 4 (3-3,2-2) arrange      gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.752]
## [1] "test rmse 1.00008428967326"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).

## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                grob
## 1 1 (2-2,1-1) arrange      gtable[layout]
## 2 2 (2-2,2-2) arrange      gtable[layout]
## 3 3 (3-3,1-1) arrange      gtable[layout]
## 4 4 (3-3,2-2) arrange      gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.905]

## [1] "********"
## [1] "JackknifeModel"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.9848  0.9969  1.0020  1.0020  1.0070  1.0190 
## [1] 0.007180245
## [1] "********"
## [1] "********"
## [1] "NaiveModel"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.9846  0.9968  1.0020  1.0020  1.0070  1.0190 
## [1] 0.007190209
## [1] "********"
## [1] "********"
## [1] "NoisedModel"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.9848  0.9971  1.0020  1.0020  1.0070  1.0240 
## [1] 0.007258813
## [1] "********"

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.1266]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.1419]
## [1] "*************************************************************"
## [1] "*************************************************************"
## [1] "one variable plus noise variable, linear regression"
## [1] "bSigmaBest 8"
## [1] "naive effects model"
## 
## Call:
## lm(formula = formulaL, data = trainData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9216 -0.6181  0.0055  0.6225  3.5298 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.20622    0.02058   10.02   <2e-16 ***
## x1           0.83459    0.03452   24.17   <2e-16 ***
## n1           0.78131    0.03844   20.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9203 on 1997 degrees of freedom
## Multiple R-squared:  0.3946, Adjusted R-squared:  0.394 
## F-statistic: 650.8 on 2 and 1997 DF,  p-value: < 2.2e-16
## 
## [1] "train rmse 0.919591353886876"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.1572]
## [1] "test rmse 1.12246743812363"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.1725]
## [1] "effects model, sigma= 8"
## 
## Call:
## lm(formula = formulaL, data = trainData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4002 -0.6792 -0.0085  0.6804  3.6877 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.207817   0.022622   9.186  < 2e-16 ***
## x1          1.001711   0.036979  27.088  < 2e-16 ***
## n1          0.011005   0.003316   3.319  0.00092 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.009 on 1997 degrees of freedom
## Multiple R-squared:  0.2725, Adjusted R-squared:  0.2718 
## F-statistic: 374.1 on 2 and 1997 DF,  p-value: < 2.2e-16
## 
## [1] "train rmse 1.00803892027929"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.1878]
## [1] "test rmse 1.01215427910968"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.2031]
## [1] "effects model, jacknifed"
## 
## Call:
## lm(formula = formulaL, data = trainData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3986 -0.6920 -0.0077  0.6877  3.8126 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.20643    0.02268   9.101   <2e-16 ***
## x1           0.98425    0.03698  26.614   <2e-16 ***
## n1          -0.07739    0.03479  -2.224   0.0262 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.014 on 1997 degrees of freedom
## Multiple R-squared:  0.2645, Adjusted R-squared:  0.2638 
## F-statistic: 359.2 on 2 and 1997 DF,  p-value: < 2.2e-16
## 
## [1] "train rmse 1.01355772650768"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.2184]
## [1] "test rmse 1.00913108707443"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).

## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.2337]

## [1] "********"
## [1] "JackknifeModel"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.9851  0.9982  1.0030  1.0030  1.0080  1.0210 
## [1] 0.006852836
## [1] "********"
## [1] "********"
## [1] "NaiveModel"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.096   1.126   1.134   1.135   1.143   1.172 
## [1] 0.01407833
## [1] "********"
## [1] "********"
## [1] "NoisedModel"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.9888  1.0030  1.0080  1.0080  1.0120  1.0450 
## [1] 0.008048595
## [1] "********"

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.2698]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.2851]
## [1] "*************************************************************"
## [1] "*************************************************************"
## [1] "one variable, diagonal regression"
## [1] "bSigmaBest 13"
## [1] "naive effects model"
##       x1 
## 1.000005 
## [1] "train rmse 1.03109338373284"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.3004]
## [1] "test rmse 1.02233899515915"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.3157]
## [1] "effects model, sigma= 13"
##       x1 
## 1.007208 
## [1] "train rmse 1.03387579088062"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.3310]
## [1] "test rmse 1.02796779024538"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.3463]
## [1] "effects model, jacknifed"
##        x1 
## 0.9871528 
## [1] "train rmse 1.03555476179036"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.3616]
## [1] "test rmse 1.02246501285093"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).

## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.3769]

## [1] "********"
## [1] "JackknifeModel"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.002   1.015   1.020   1.020   1.025   1.038 
## [1] 0.007704492
## [1] "********"
## [1] "********"
## [1] "NaiveModel"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.001   1.015   1.020   1.020   1.025   1.038 
## [1] 0.007740295
## [1] "********"
## [1] "********"
## [1] "NoisedModel"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.001   1.017   1.022   1.023   1.029   1.053 
## [1] 0.009453189
## [1] "********"

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.4130]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.4283]
## [1] "*************************************************************"
## [1] "*************************************************************"
## [1] "one variable plus noise variable, diagonal regression"
## [1] "bSigmaBest 19"
## [1] "naive effects model"
##       x1       n1 
## 1.000005 1.000333 
## [1] "train rmse 0.958540237968956"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.4436]
## [1] "test rmse 1.20618715828122"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.4589]
## [1] "effects model, sigma= 19"
##          x1          n1 
## 0.988586923 0.003381102 
## [1] "train rmse 1.03609580082898"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.4742]
## [1] "test rmse 1.0389153388548"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.4895]
## [1] "effects model, jacknifed"
##         x1         n1 
##  0.9871528 -0.1088369 
## [1] "train rmse 1.03458802692346"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.5048]
## [1] "test rmse 1.03176880530955"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).

## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.5201]

## [1] "********"
## [1] "JackknifeModel"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.001   1.016   1.022   1.022   1.027   1.044 
## [1] 0.007913222
## [1] "********"
## [1] "********"
## [1] "NaiveModel"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.178   1.208   1.219   1.220   1.232   1.276 
## [1] 0.01667267
## [1] "********"
## [1] "********"
## [1] "NoisedModel"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.004   1.022   1.027   1.030   1.034   1.145 
## [1] 0.01595532
## [1] "********"

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.5562]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).

## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.5715]
## [1] "*************************************************************"
if(!is.null(cl)) {
  parallel::stopCluster(cl)
  cl <- NULL
}